In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
In [245]:
pdb_id = '1OPD'
In [246]:
rmsds = pd.read_csv(pdb_id + '.rmsd.csv', index_col=0)
rmsds[:3]
Out[246]:
In [247]:
len(rmsds)
Out[247]:
In [298]:
ax = rmsds.hist(column='rmsd', figsize=(8,4))[0][0]
_ = ax.set(ylabel="Frequency", xlabel="RMSD", title="RMSD Histogram")
In [250]:
#fig = plt.figure(figsize=(10,6))
#ax = fig.add_subplot(1, 1, 1)
#rmsds.hist(column='rmsd', by='group_count', ax=ax)
In [251]:
def line_plot(df, column, title=None):
fig = plt.figure(figsize=(12,5))
ax = fig.add_subplot(1, 1, 1, xlabel="Experimento", ylabel="RMSD")
for value in rmsds[column].unique():
ax.plot(df[df[column] == value]['rmsd'], label=value)
ax.legend()
ax.set_title(title)
fig.savefig(pdb_id + '.' + column + '.line.svg', format='svg')
return ax
In [252]:
def scatter_plot(df, column, title):
fig = plt.figure(figsize=(12,5))
ax = fig.add_subplot(1, 1, 1, xticks=rmsds[column].unique())
df.plot(column, "rmsd", ax=ax, kind='scatter')
ax.set(title=title, xlabel=title, ylabel='RMSD')
fig.savefig(pdb_id + '.' + column + '.scatter.svg', format='svg')
return ax
In [275]:
def box_plot(df, column, title):
fig = plt.figure(figsize=(12,5))
ax = fig.add_subplot(1, 1, 1, ylabel="RMSD", xlabel=title)
df.boxplot('rmsd', column, ax=ax, showmeans=True)
ax.set(title=title, xlabel=title, ylabel='RMSD')
fig.suptitle('')
fig.savefig(pdb_id + '.' + column + '.boxplot.svg', format='svg')
return ax
In [276]:
rmsds = rmsds.sort_values(by=['group_count', 'max_templates', 'max_blast', 'matrix', 'fragment_size'])
rmsds[:6]
Out[276]:
In [299]:
line_plot(rmsds, 'fragment_size', "Fragment Size")
Out[299]:
In [278]:
scatter_plot(rmsds, 'fragment_size', "Fragment Size")
Out[278]:
In [279]:
box_plot(rmsds, 'fragment_size', "Fragment Size")
Out[279]:
In [280]:
rmsds = rmsds.sort_values(by=['fragment_size', 'matrix', 'max_blast', 'max_templates', 'group_count'])
rmsds[:6]
Out[280]:
In [281]:
line_plot(rmsds, 'group_count', "Number of Groups")
Out[281]:
In [282]:
box_plot(rmsds, 'group_count', "Number of Groups")
Out[282]:
In [283]:
rmsds = rmsds.sort_values(by=['fragment_size', 'group_count', 'max_blast', 'matrix', 'max_templates'])
rmsds[:6]
Out[283]:
In [284]:
line_plot(rmsds, 'max_templates', "Number of Templates")
Out[284]:
In [285]:
box_plot(rmsds, 'max_templates', "Number of Templates")
Out[285]:
In [286]:
rmsds = rmsds.sort_values(by=['fragment_size', 'group_count', 'max_templates', 'max_blast', 'matrix'])
rmsds[:4]
Out[286]:
In [287]:
line_plot(rmsds, 'matrix', "Substitution Matrix")
Out[287]:
In [288]:
box_plot(rmsds, 'matrix', "Substitution Matrix")
Out[288]:
In [302]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111, projection='3d', xlabel="Fragment Size", ylabel="Number of Groups", zlabel="RMSD")
surf = ax.plot_trisurf(rmsds['fragment_size'], rmsds['group_count'], rmsds['rmsd'], cmap=cm.Blues)
fig.colorbar(surf)
Out[302]: